#knitr::opts_chunk$set(echo = FALSE, error=TRUE, message=FALSE, warning=FALSE)
library(tidyverse)     # for data cleaning and plotting
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)     # for date manipulation
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(openintro)     # for the abbr2state() function
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(maps)          # for map data
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(ggmap)         # for mapping points on maps
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(gplots)        # for col2hex() function
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(RColorBrewer)  # for color palettes
library(sf)            # for working with spatial data
## Linking to GEOS 3.9.1, GDAL 3.4.0, PROJ 8.1.1; sf_use_s2() is TRUE
library(leaflet)       # for highly customizable mapping
library(ggthemes)      
library(plotly)        # for the ggplotly() - basic interactivity
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gganimate)     # for adding animation layers to ggplots
library(gifski)        # for creating the gif (don't need to load this library every time,but need it installed)
library(transformr)    # for "tweening" (gganimate)
## 
## Attaching package: 'transformr'
## The following object is masked from 'package:sf':
## 
##     st_normalize
library(shiny)         # for creating interactive apps
library(patchwork)     # for nicely combining ggplot2 graphs
library(gt)            # for creating nice tables
## 
## Attaching package: 'gt'
## The following object is masked from 'package:openintro':
## 
##     sp500
library(countrycode)
theme_set(theme_minimal())
library(tidytext)

Every four years (or two), the world holds its breath as individual humans represent whole countries through their mastery of a sport. We data scientists have analyzed the Summer Olympics from the year 1896 to 2016–yes, 120 years of the top athletes competing for the glory, and, perhaps more importantly, the gold.

Original Data

This data hails from Kaggle from the games in Athens 1896 to Rio 2016; more recently, it was popularized through the weekly R challenge “Tidy Tuesday.” Until 1992, the Winter and Summer Games coincided in the same year, which results in odd data points like a Figure Skating victory slipping into the Summer Olympics data sets. Other aspects of the history of the Olympics include the existence and end to whole nations as the years progress. Examples include East and West Germany and the Soviet Union–as well as unrecognized independent territories such as Kosovo and Taiwan. Our team’s methodology to eliminate our own biases was to keep the data points that reflect 2016’s Olympic Games for sports measured and the countries recognized are universally agreed upon by the international community.

olympics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-07-27/olympics.csv')
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, sex, team, noc, games, season, city, sport, event, medal
## dbl  (5): id, age, height, weight, year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
world <- map_data("world")

medal_colors <- c(
  Gold = "#d5a500",
  Silver = "#b7b7b7",
  Bronze = "#a17419")

Filtered Data Sets

summer_olympics <- olympics %>% 
  filter(season == "Summer") %>% 
   mutate(countryName = countrycode(noc, "genc3c", "country.name")) %>% 
   mutate(countryName = ifelse(countryName == "United States", "USA", countryName)) %>% 
   mutate(countryName = ifelse(noc == "ALG", "Algeria", countryName)) %>% 
   mutate(countryName = ifelse(noc == "BAH", "Bahamas", countryName)) %>%
   mutate(countryName = ifelse(noc == "BUL", "Bulgaria", countryName)) %>%
   mutate(countryName = ifelse(noc == "CHI", "Chile", countryName)) %>%
   mutate(countryName = ifelse(noc == "CRC", "Costa Rica", countryName)) %>%
   mutate(countryName = ifelse(noc == "CRO", "Croatia", countryName)) %>%
   mutate(countryName = ifelse(noc == "CZA", "Czech Republic", countryName)) %>%
   mutate(countryName = ifelse(noc == "DEN", "Denmark", countryName)) %>%
   mutate(countryName = ifelse(noc == "FIJ", "Fiji", countryName)) %>%
   mutate(countryName = ifelse(noc == "GER", "Germany", countryName)) %>%
   mutate(countryName = ifelse(noc == "GRE", "Greece", countryName)) %>%
   mutate(countryName = ifelse(noc == "GRN", "Grenada", countryName)) %>%
   mutate(countryName = ifelse(noc == "HAI", "Haiti", countryName)) %>%
   mutate(countryName = ifelse(noc == "INA", "Indonesia", countryName)) %>%
   mutate(countryName = ifelse(noc == "IRI", "Iran", countryName)) %>%
   mutate(countryName = ifelse(noc == "LAT", "Latvia", countryName)) %>%
   mutate(countryName = ifelse(noc == "MGL", "Mongolia", countryName)) %>%
   mutate(countryName = ifelse(noc == "NED", "Netherlands", countryName)) %>%
   mutate(countryName = ifelse(noc == "NGR", "Nigeria", countryName)) %>%
   mutate(countryName = ifelse(noc == "POR", "Portugal", countryName)) %>%
   mutate(countryName = ifelse(noc == "PUR", "Puerto Rico", countryName)) %>%
   mutate(countryName = ifelse(noc == "RSA", "South Africa", countryName)) %>%
   mutate(countryName = ifelse(noc == "SLO", "Slovenia", countryName)) %>%
   mutate(countryName = ifelse(noc == "SUI", "Switzerland", countryName)) %>%
   mutate(countryName = ifelse(noc == "UAE", "United Arab Emirates", countryName)) %>%
   mutate(countryName = ifelse(noc == "URU", "Uruguay", countryName)) %>%
   mutate(countryName = ifelse(noc == "VIE", "Vietnam", countryName)) %>%
   mutate(countryName = ifelse(noc == "ZIM", "Zimbabwe", countryName)) 
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: AHO, ALG, ANG, ANT, ANZ, ARU, ASA, BAH, BAN, BAR, BER, BHU, BIZ, BOH, BOT, BRU, BUL, BUR, CAM, CAY, CGO, CHA, CHI, CRC, CRO, CRT, DEN, ESA, EUN, FIJ, FRG, GAM, GBS, GDR, GEQ, GER, GRE, GRN, GUA, GUI, HAI, HON, INA, IOA, IRI, ISV, IVB, KOS, KSA, KUW, LAT, LBA, LES, LIB, MAD, MAL, MAS, MAW, MGL, MON, MRI, MTN, MYA, NBO, NCA, NED, NEP, NFL, NGR, NIG, OMA, PAR, PHI, PLE, POR, PUR, RHO, ROT, RSA, SAA, SAM, SCG, SEY, SKN, SLO, SOL, SRI, SUD, SUI, TAN, TCH, TGA, TOG, TPE, UAE, UAR, UNK, URS, URU, VAN, VIE, VIN, WIF, YAR, YMD, YUG, ZAM, ZIM
# Footnote or mention in part of the project the choice to leave out territories not fully recognized, such as Kosovo and Chinese Taipei, and countries that no longer exist, including the Soviet Union and Yugoslavia. 


summer_gold <- summer_olympics %>% 
  drop_na(medal) %>% 
  filter(medal == "Gold") 

summer_medals <- summer_olympics %>% 
  drop_na(medal)

medal_prop <- summer_olympics %>% 
  count(countryName, medal) %>% 
  group_by(countryName) %>% 
  filter(any(!is.na(medal))) %>% 
  mutate(medal = ifelse(is.na(medal), "No Medal", medal)) %>% 
  pivot_wider(
    names_from = medal,
    values_from = n,
    values_fill = 0) %>% 
  mutate(
    total_medals = Gold + Silver + Bronze,
    total_events = total_medals + `No Medal`,
    p_medal = total_medals / total_events) %>% 
  arrange(desc(total_medals))

Female-only, medal-winning data:

Instructions that might be useful to remember:

Felicia’s Graphs

1. Overall medals (gold, silver, and bronze) won by Olympians

This bar graph shows the descending order of overall Olympic medals won based on gender. This feflects women not being allowed in competitions for a good portion of all Olympic Games. This graph is also skews heavily towards sports that naturally have more medals to win due to different events under the same sport category.

In general, the net amount of medals won is favored by sports within which have multiple opportunities to medal. One example includes Athletics, also known as “Track and Field,” having multiple races, team relays, and field events for athletes to earn medals in. Other result by contrasting by gender shows both the lack of female inclusion for a significant period of time, reflecting gender dynamics over the decades, as well as sports that are more dominated by one gender. Synchronized Swimming, Rhythmic Gymnastics appear to only have female champions, while Wrestling, the Modern Pentathlon, Rugby, and Lacrosse appear to have majority male winners.

2. Olympic medal winning women’s heights and weights

## # A tibble: 8,665 × 16
##       id name    sex     age height weight team   noc   games  year season city 
##    <dbl> <chr>   <chr> <dbl>  <dbl>  <dbl> <chr>  <chr> <chr> <dbl> <chr>  <chr>
##  1    37 Ann Kr… F        23    182     64 Norway NOR   1996…  1996 Summer Atla…
##  2    65 Patima… F        21    165     49 Azerb… AZE   2016…  2016 Summer Rio …
##  3    67 Mariya… F        22    179     80 Russia RUS   2008…  2008 Summer Beij…
##  4    90 Tamila… F        21    163     60 Russia RUS   2004…  2004 Summer Athi…
##  5   153 Monica… F        23    191     88 Unite… USA   2008…  2008 Summer Beij…
##  6   165 Nia Ni… F        20    175     56 Unite… USA   2004…  2004 Summer Athi…
##  7   259 Reema … F        21    173     59 Canada CAN   1984…  1984 Summer Los …
##  8   394 Irene … F        19    160     48 East … GDR   1972…  1972 Summer Muni…
##  9   395 Jennif… F        20    160     62 Canada CAN   2012…  2012 Summer Lond…
## 10   428 Elvan … F        25    159     40 Turkey TUR   2008…  2008 Summer Beij…
## # … with 8,655 more rows, and 4 more variables: sport <chr>, event <chr>,
## #   medal <chr>, countryName <chr>

This graph shows the heights and weights of women who competed in the Olympics and won a medal.

Above is a graph that shows all the female athletes in one graph, showing a wide range of heights and weights of Olympians. However, due to the amount of sports, the graph itself is very noisy. To better understand the trends, the same data is compiled into four different categories.

The first category is “Holding an object”: These athletes are holding onto something, whether it is the reigns of a horse or a racket, they are armed.

The second is “Wearing protective gear.” The athletes in this category are wearing an extra layer designed for their sport to reduce injury.

The third is “In water.” As it sounds, these athletes are found in the water to compete.

Lastly, there is “Just self.” These athletes are not holding anything, wearing something meant to protect them, or are in the water.

## Warning in sport == c("Badminton", "Cycling", "Equestrianism", "Golf",
## "Hockey", : longer object length is not a multiple of shorter object length
## # A tibble: 133 × 16
##       id name    sex     age height weight team   noc   games  year season city 
##    <dbl> <chr>   <chr> <dbl>  <dbl>  <dbl> <chr>  <chr> <chr> <dbl> <chr>  <chr>
##  1  1169 "Maril… F        33    172     58 Nethe… NED   2012…  2012 Summer Lond…
##  2  1546 "Eirin… F        17    167     45 Greece GRE   2000…  2000 Summer Sydn…
##  3  4418 "Alyso… F        27    162     65 Austr… AUS   2000…  2000 Summer Sydn…
##  4  6624 "Diana… F        33    175     85 Italy  ITA   2016…  2016 Summer Rio …
##  5 11448 "Fiona… F        40    173     75 Great… GBR   2016…  2016 Summer Rio …
##  6 12113 "Elisa… F        16    168     49 Italy  ITA   2004…  2004 Summer Athi…
##  7 14578 "Jeane… F        26    169     60 Great… GBR   2000…  2000 Summer Sydn…
##  8 15540 "Joann… F        28    174     72 Austr… AUS   2000…  2000 Summer Sydn…
##  9 17479 "Els C… F        30    177     58 Belgi… BEL   2000…  2000 Summer Sydn…
## 10 22054 "Mario… F        32    170     60 France FRA   1996…  1996 Summer Atla…
## # … with 123 more rows, and 4 more variables: sport <chr>, event <chr>,
## #   medal <chr>, countryName <chr>
## Warning in sport == c("Boxing", "Judo", "Fencing", "Football", "Taekwondo", :
## longer object length is not a multiple of shorter object length
## # A tibble: 152 × 16
##       id name    sex     age height weight team   noc   games  year season city 
##    <dbl> <chr>   <chr> <dbl>  <dbl>  <dbl> <chr>  <chr> <chr> <dbl> <chr>  <chr>
##  1  1191 "Judit… F        27    164     62 Hunga… HUN   1964…  1964 Summer Tokyo
##  2  1730 "Monic… F        26    183     74 Unite… USA   2016…  2016 Summer Rio …
##  3  2769 "Kimia… F        18    185     57 Iran   IRI   2016…  2016 Summer Rio …
##  4  3593 "An Ku… F        28    160     52 North… PRK   2008…  2008 Summer Beij…
##  5  4101 "Andri… F        26    175     65 Brazil BRA   2004…  2004 Summer Athi…
##  6  5631 "Kosov… F        27    166     56 Sweden SWE   2016…  2016 Summer Rio …
##  7  7597 "Bao Y… F        24    172     67 China  CHN   2008…  2008 Summer Beij…
##  8  7717 "Brbar… F        20    171     71 Brazil BRA   2008…  2008 Summer Beij…
##  9  7920 "Valri… F        27    165     53 France FRA   1996…  1996 Summer Atla…
## 10  8364 "Saski… F        33    170     60 Germa… GER   2016…  2016 Summer Rio …
## # … with 142 more rows, and 4 more variables: sport <chr>, event <chr>,
## #   medal <chr>, countryName <chr>
## Warning in sport == c("Canoeing", "Diving", "Water Polo", "Rowing",
## "Swimming", : longer object length is not a multiple of shorter object length
## # A tibble: 406 × 16
##       id name    sex     age height weight team   noc   games  year season city 
##    <dbl> <chr>   <chr> <dbl>  <dbl>  <dbl> <chr>  <chr> <chr> <dbl> <chr>  <chr>
##  1   395 "Jenni… F        20    160     62 Canada CAN   2012…  2012 Summer Lond…
##  2   980 "Rebec… F        23    179     70 Great… GBR   2012…  2012 Summer Lond…
##  3  1380 "Chris… F        22    172     52 Unite… USA   1992…  1992 Summer Barc…
##  4  2443 "Joan … F        17    170     59 Unite… USA   1952…  1952 Summer Hels…
##  5  2464 "Jo Qe… F        30    171     58 New Z… NZL   2016…  2016 Summer Rio …
##  6  2575 "Lisa … F        27    172     58 Canada CAN   1996…  1996 Summer Atla…
##  7  3070 "Malin… F        23    180     64 Sweden SWE   2000…  2000 Summer Sydn…
##  8  3880 "Agnet… F        31    172     68 Sweden SWE   1992…  1992 Summer Barc…
##  9  4393 "Herta… F        26    183     82 Roman… ROU   1988…  1988 Summer Seoul
## 10  4720 "Tessa… F        27    177     71 Nethe… NED   2000…  2000 Summer Sydn…
## # … with 396 more rows, and 4 more variables: sport <chr>, event <chr>,
## #   medal <chr>, countryName <chr>
## Warning in sport == c("Athletics", "Basketball", "Beach Volleyball",
## "Gymnastics", : longer object length is not a multiple of shorter object length
## # A tibble: 353 × 16
##       id name     sex     age height weight team  noc   games  year season city 
##    <dbl> <chr>    <chr> <dbl>  <dbl>  <dbl> <chr> <chr> <chr> <dbl> <chr>  <chr>
##  1   428 Elvan A… F        25    159     40 Turk… TUR   2008…  2008 Summer Beij…
##  2   610 Ginko A… F        26    148     46 Japan JPN   1964…  1964 Summer Tokyo
##  3  1020 Adriana… F        29    180     61 Braz… BRA   2000…  2000 Summer Sydn…
##  4  1071 Kseniya… F        20    158     48 Russ… RUS   2012…  2012 Summer Lond…
##  5  1254 Macaren… F        27    170     67 Spain ESP   2012…  2012 Summer Lond…
##  6  1313 Robyn M… F        32    172     67 Unit… USA   2008…  2008 Summer Beij…
##  7  1564 Chioma … F        24    164     57 Nige… NGR   1996…  1996 Summer Atla…
##  8  1732 Liudmyl… F        29    165     58 Sovi… URS   1976…  1976 Summer Mont…
##  9  2546 Alessan… F        22    200     85 Braz… BRA   1996…  1996 Summer Atla…
## 10  3281 Simona … F        16    158     44 Roma… ROU   1996…  1996 Summer Atla…
## # … with 343 more rows, and 4 more variables: sport <chr>, event <chr>,
## #   medal <chr>, countryName <chr>

This is a series of graphs that filter out sports into four distinct categories.

3. World map showing the proportion of Olympic medal wins to participation in events

This world map shows the proportion of Olympic medal wins to participation in events per country. The United States has the largest proportion of medal earnings to individual participation in the Olympics, followed by Russia, Germany, and Finland.

The map above adds depth to our understanding of Olympic medal winning countries. Previous maps show an extreme domination in the Olympics by the United States of America, but taking the wins in relation to total participation, other countries emerge as prolific medal winners as well. Russia, Finland, Germany, and China also tend to win more often when their athletes are competing. The United States still is at the highest level of medal wins. One other aspect to note are all the medals won by countries that no longer exist today, like the USSR and East and West Germany. This graph tells a more accurate story of where champions are from, but cannot wholly depict the wins every achieved in the world.

##Gabby’s Graph

## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: ALG, ANZ, BAH, BUL, CHI, CRC, CRO, DEN, EUN, FIJ, FRG, GDR, GER, GRE, GRN, HAI, INA, IOA, IRI, KOS, LAT, MGL, NED, NGR, POR, PUR, RSA, SCG, SLO, SUI, TCH, TPE, UAE, URS, URU, VIE, YUG, ZIM

4. Which countries have won the most gold medals?

This graphs is a basic bar graph that shows us the number of gold medals earned by each participating country

The map above gives us a basic idea of how many gold medals are earned per country in the olympics. It’s very obvious that the United States outnumber every other country by far with the second and third place countries having over 1000 less.As the numbers of gold medals won decrease, it becomes harder to distinguish the count. While this graph gives us an idea of what countries dominate the summer olympics, we have little details on what events they are winning them in.

5. When looking at countries that have won more than 20 gold medals, which sports have won the most gold medals for each country?

This is a series of graphs focusing on only countries that have won more than 20 gold medals. Each country has a graph displaying which events have won the most gold medals in descending order

Above is a series of 34 graphs, each graph represents a country that has won more than 20 gold medals, and displays what events they have received the gold medals in. These graphs give us a detailed look on the events that are winning the most gold medals for each country that we are looking at. These graphs allow us to compare and contrast the countries since they are all in one place, and easy to distinguish the sports because not only did we color code them, we included the names of them on the y-axis to make it easier. It’s interesting to see which countries have won gold medals in only one sport, and what sport that is. By the same token, these graphs are multipurposeful because they show us how many sports each country has won gold medals in, and the distribution of those medals as well.

## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: AHO, ALG, ANG, ANT, ANZ, ARU, ASA, BAH, BAN, BAR, BER, BHU, BIZ, BOH, BOT, BRU, BUL, BUR, CAM, CAY, CGO, CHA, CHI, CRC, CRO, CRT, DEN, ESA, EUN, FIJ, FRG, GAM, GBS, GDR, GEQ, GER, GRE, GRN, GUA, GUI, HAI, HON, INA, IOA, IRI, ISV, IVB, KOS, KSA, KUW, LAT, LBA, LES, LIB, MAD, MAL, MAS, MAW, MGL, MON, MRI, MTN, MYA, NBO, NCA, NED, NEP, NFL, NGR, NIG, OMA, PAR, PHI, PLE, POR, PUR, RHO, ROT, RSA, SAA, SAM, SCG, SEY, SKN, SLO, SOL, SRI, SUD, SUI, TAN, TCH, TGA, TOG, TPE, UAE, UAR, UNK, URS, URU, VAN, VIE, VIN, WIF, YAR, YMD, YUG, ZAM, ZIM

6. What does the distribution of medals look like for the 10 highest medal earning countries?

Below is a pair of graphs. The first graph shows the proportion of medals won by the top 10 countries . The second graph shows the number of medals won by the top 10 countries

Above are two graphs dealing with a similar concept, looking at earned medals as a whole for the 10 highest winning countries. Previous graphs have only focused on gold medals, while these two graphs bring in all three medals, providing us with a bigger picture. The first graph, the plotly, displays the distribution of medals won by the top 10 countries. Since this graph is proportional we can compare the amount of medals earned by countries by the percentage distribution of each medal. The second graph, gives us similar information but doesn’t show the number of medals as proportions. Here we just see how many of each type of medal each of our top countries have won, in descending order. We can still somewhat estimate the proportion of each medal for each country but here we see which countries have the most medals once again.